
*------------------------------------------------------------------------------**------------------------------------------------------------------------------*
* import data coinmarketcap *
*------------------------------------------------------------------------------**------------------------------------------------------------------------------*

import delimited "data/source/coinmarketcap_top100_updated.csv", clear 

*keep if currency == "bitcoin"

destring marketcap, replace ignore("-")

gen supply = marketcap / open  

replace volume = volume/1000000

gen date2 =date(date, "MDY")
format date2 %td
drop date
rename date2 date

save "data/working/working_coinmarketcap.dta", replace 

keep if currency == "bitcoin"

gen month=month(date)
gen year=year(date)
gen ym_date =ym(year,month)
format ym_date %tm


collapse (mean) close (sum) volume, by(ym_date)

save "data/working/working_coinmarketcap_monthly.dta", replace 


*----- Clean the price dataset
import delimited using "data/source/coinmarketcap_top100_updated.csv", clear varn(1) asdouble  
rename currency currency_id
replace currency_id = "bitcoin_cash" if currency_id == "bitcoin-cash"
replace currency_id = "bytecoin_bcn" if currency_id == "bytecoin-bcn"

* Format the date
rename date date_o
gen date = date(date_o,"MDY")
order date
format date %td

* marketcap
destring marketcap, replace force
save "data/working/coinmarketcap",replace



*------------------------------------------------------------------------------**------------------------------------------------------------------------------*
* import data blockchain *
*------------------------------------------------------------------------------**------------------------------------------------------------------------------*


*------------------------------------------------------------------------------*
* Addresses *
*------------------------------------------------------------------------------*

import delimited "data/source/n-unique-addresses.csv", clear 

gen year = substr(v1,1,4)
destring year, replace

gen month = substr(v1,6,2)
destring month, replace

gen day = substr(v1,9,2)
destring day, replace

gen date = mdy(month,day,year)
format date %td

drop v2
split v3, parse(,)
destring v32, replace

keep date v32
rename v32 num_uniqueaddress_BTC

sort date
save "data/working/working_blockchain_uniqueaddresses.dta", replace 


*------------------------------------------------------------------------------*
* Prices *
*------------------------------------------------------------------------------*

import delimited "data/source/market-price.csv", clear 

split v1, p(" " "/")

destring v11, gen (month)
destring v12, gen (day)

gen new_string = "20" + v13
destring new_string, gen(year)

gen date = mdy(month,day,year)
format date %td

keep date v2
rename v2 marketprice
gen marketprice_log = log(1+marketprice)

sort date
save "data/working/working_blockchain_marketprice.dta", replace 



